In this markdown, we will analyse the networks of TF/target gene cis-regulatory interactinos generated using ANANSE.
We will load these networks as tabular data that we will transform into igraph graph objects, that we can parse and annotate using our information on TF classes and other similar functional annotation that we have carried out over the course of this project.
library(DESeq2)
library(limma)
library(ComplexHeatmap)
library(circlize)
library(ggplot2)
library(igraph)
source("code/r_code/r_functions/sourcefolder.R")
sourceFolder(
"code/r_code/r_functions",
recursive = TRUE
)
sourceFolder(
"code/r_code/r_general",
recursive = TRUE
)
We will need some of the information we have generated in our previous analyses such as stage-specific cluster information and Transcription factor annotation.
# RNA clusters
load("outputs/rda/stage_specific_clusters.rda")
# Transcription Factor information
load("outputs/rda/TF_annotation.rda")
Here we will load all the remaining information of our interest and we will transform it into a format compliant to some of the functions we have made to annotate the graphs; basically, tabular data with gene id <–> annotation category.
# Transcription factors
allTFclasses_col <- c(topclasses_col,otherclasses_col)
pfla_tfs_graph_analysis <-
merge(
pfla_tfs,
data.frame(
TFclass =
c(names(topclasses_col),names(otherclasses_col)),
col =
c(topclasses_col,otherclasses_col)
),
by.x = 2,
by.y = 1
)[,c(2,1,3)]
colnames(pfla_tfs_graph_analysis) <- c("id","TFclass","col")
# Effector genes
pfla_tfeg <- data.frame(
id = allgenes,
TFEG = ifelse(allgenes %in% pfla_tfs$id, "TF", "EG")
)
# Functional categories (COG)
pfla_funcat <- read.table(
"/home/ska/aperpos/projects/ptychodera_cisreg_development/outputs/functional_annotation/COGs/pfla_cogs.tsv",
col.names = c("id","funcat")
)
# Trans-developmental / housekeeping
pfla_td_nohk <-
unique(
read.table(
"outputs/functional_annotation/TD_annotation/transdev_expanded_nohk.tsv",
header=F,
stringsAsFactors = F)[,1]
)
pfla_hk_notd <-
unique(
read.table(
"outputs/functional_annotation/TD_annotation/housekeep_expanded_notd.tsv",
header=F,
stringsAsFactors=F)[,1]
)
pfla_tdhk <- data.frame(
id = allgenes,
TDHK =
ifelse(allgenes %in% pfla_td_nohk, "td",
ifelse(allgenes %in% pfla_hk_notd, "hk",
"none")
)
)
Finally, we add them into a list that our function for annotation will iterate through. We will call this the list of attributes
pfla_attributes_list <-
list(
pfla_tfeg,
pfla_tfs_graph_analysis,
pfla_funcat,
pfla_tdhk
)
We will also load the GO annotation of Ptychodera to use it later on in the functions.
#gene universe
gene_universe <- allgenes
# gene-GO mappings
pfla_id_GO <-
readMappings(
"outputs/functional_annotation/go_blast2go/GO_annotation.txt"
)
Likewise, we will also load the EggNOG annotation of gene names for more clarity in the plots of the graphs.
pfla_genenames <-
read.delim2(
file = "outputs/functional_annotation/eggnog/emapper.annotations",
skip = 3,
header = TRUE
)[,c(1,5,13)]
pfla_genenames <- pfla_genenames[pfla_genenames$predicted_gene_name != "",]
Here we will use the function LoadNetworkData that reads the .network file replacing instances of em dash and other ANANSE-related quirks. The resulting dataframe has three columns: “tf” (transcription factor), “tg” (target gene), and “prob” (probability). We manually add back underscores to the gene ids because ANANSE required the gene ids in a different format.
We do this for both the Early Blastula (EB) and Late Gastrula (LG) networks.
# EARLY BLASTULA
pfla_EB_nw <- LoadNetworkData("outputs/ananse/pfla_EB.network")
## Warning in readLines(x): incomplete final line found on
## 'outputs/ananse/pfla_EB.network'
pfla_EB_nw$tf <- gsub("TCONS", "TCONS_", pfla_EB_nw$tf)
pfla_EB_nw$tg <- gsub("TCONS", "TCONS_", pfla_EB_nw$tg)
# LATE GASTRULA
pfla_LG_nw <- LoadNetworkData("outputs/ananse/pfla_LG.network")
pfla_LG_nw$tf <- gsub("TCONS", "TCONS_", pfla_LG_nw$tf)
pfla_LG_nw$tg <- gsub("TCONS", "TCONS_", pfla_LG_nw$tg)
Here it is what it looks like.
head(pfla_EB_nw)
## tf tg prob
## 1: TCONS_00000165 TCONS_00000001 0.13640409
## 2: TCONS_00000165 TCONS_00000003 0.11046195
## 3: TCONS_00000165 TCONS_00000004 0.10880337
## 4: TCONS_00000165 TCONS_00000006 0.09827907
## 5: TCONS_00000165 TCONS_00000007 0.09742769
## 6: TCONS_00000165 TCONS_00000008 0.17221461
The first thing we will do is filter the network and keep only those interactions whose values are in the top 5% percentile. We will use this later on to quantify and measure the In- and Out-Degree of each gene, among other things.
pfla_EB_nw2 <- FilterNetwork(pfla_EB_nw,q=0.95)
## [1] "Original network is 35854 genes. Filtered network is 8256 genes."
We generate an igraph object using our custom wrapper GenerateNetwork that also filter by the same number of interactions, since we provide q = 0.95.
pfla_EB_graph <- GenerateNetwork(pfla_EB_nw,q = 0.95)
Using our list of attributes, we add this information to all the pertinent nodes (== genes) of our network using our ParseNetwork function.
pfla_EB_parsenetwork <- ParseNetwork(pfla_EB_graph, pfla_attributes_list)
The resulting object can be subsetted to extract the new network with attributes on one object and the data frame of attributes on the other.
pfla_EB_graph2 <- pfla_EB_parsenetwork[[1]]
pfla_EB_df_attr <- pfla_EB_parsenetwork[[2]]
The data frame of attributes is a data.frame object sorted in the same order as nodes are indexed in the igraph object. The content of this data frame is, for every node that has an attribute, a row with the id of this node, the index of this node, and a bunch of attributes (color, whether it is a trans-developmental gene, TF class, functional category, etc.)
head(pfla_EB_df_attr)
## id index TFEG TFclass col funcat TDHK
## 1 TCONS_00001049 1 TF T-box #f38d97 K none
## 2 TCONS_00001141 2 TF C2H2_ZF #ffebb5 K none
## 3 TCONS_00001174 3 TF Runt lightgoldenrod2 Z none
## 4 TCONS_00001304 4 TF C2H2_ZF #ffebb5 O td
## 5 TCONS_00001709 5 TF Homeodomain #5a5a82 none
## 6 TCONS_00001951 6 TF Pou chartreuse2 K none
Finally, we use our wrapper NetworkStats.
This wrapper performs a number of calculations using our igraph object and the tabular data frame, including also the gene ontology of target genes, and also reports the top central genes, the centrality of different kinds of genes by attribute (e.g. centrality of genes by TF class).
The inputs of the function are:
The function outputs a list containing various statistics and metrics of the network, such as the number of genes, the number of active TFs, the top emitters and receivers of the network, the number of self-regulated TFs, the size of the connected components of the graph, the centrality of the TFs and the functional categories, and the gene ontology information.
pfla_EB_stats <- NetworkStats(pfla_EB_nw2, pfla_EB_graph2, pfla_EB_df_attr, N = 10, gene_universe = gene_universe, id2go = pfla_id_GO)
## [1] "Basic Stats"
## [1] "Components"
## [1] "13"
## [1] "Centrality and category-based metrics"
## [1] "Centrality"
## [1] "Centrality of TFs"
## [1] "Centrality of Trans-Devs"
## [1] "Edges of Functional Categories"
## [1] "Centrality of Functional Categories"
## [1] "Getting Gene Ontology"
## [1] "Starting analysis 1 of 5"
## Warning in rev(-log(as.numeric(allres_x$classicFisher[1:maxgenesplot]) * : NAs
## introduced by coercion
## [1] "Starting analysis 2 of 5"
## Warning in rev(-log(as.numeric(allres_x$classicFisher[1:maxgenesplot]) * : NAs
## introduced by coercion
## [1] "Starting analysis 3 of 5"
## Warning in getSigGroups(object, test.stat): No enrichment can pe performed -
## there are no feasible GO terms!
## [1] "Starting analysis 4 of 5"
## Warning in rev(-log(as.numeric(allres_x$classicFisher[1:maxgenesplot]) * : NAs
## introduced by coercion
## [1] "Starting analysis 5 of 5"
## Warning in rev(-log(as.numeric(allres_x$classicFisher[1:maxgenesplot]) * : NAs
## introduced by coercion
## [1] "Generating output"
Next we do the same for the Late Gastrula Network. We start by filtering the Network:
pfla_LG_nw2 <- FilterNetwork(pfla_LG_nw,q=0.95)
## [1] "Original network is 35854 genes. Filtered network is 8543 genes."
Then we also generate an igraph network using the same percentile.
pfla_LG_graph <- GenerateNetwork(pfla_LG_nw,q = 0.95)
We parse this graph using our list of attributes
pfla_LG_parsenetwork <- ParseNetwork(pfla_LG_graph, pfla_attributes_list)
pfla_LG_graph2 <- pfla_LG_parsenetwork[[1]]
pfla_LG_df_attr <- pfla_LG_parsenetwork[[2]]
And finally we calculate the stats and metrics of this network:
pfla_LG_stats <- NetworkStats(pfla_LG_nw2, pfla_LG_graph2, pfla_LG_df_attr, N = 10, gene_universe = gene_universe, id2go = pfla_id_GO)
## [1] "Basic Stats"
## [1] "Components"
## [1] "24"
## [1] "Centrality and category-based metrics"
## [1] "Centrality"
## [1] "Centrality of TFs"
## [1] "Centrality of Trans-Devs"
## [1] "Edges of Functional Categories"
## [1] "Centrality of Functional Categories"
## [1] "Getting Gene Ontology"
## [1] "Starting analysis 1 of 5"
## Warning in rev(-log(as.numeric(allres_x$classicFisher[1:maxgenesplot]) * : NAs
## introduced by coercion
## [1] "Starting analysis 2 of 5"
## Warning in rev(-log(as.numeric(allres_x$classicFisher[1:maxgenesplot]) * : NAs
## introduced by coercion
## [1] "Starting analysis 3 of 5"
## Warning in getSigGroups(object, test.stat): No enrichment can pe performed -
## there are no feasible GO terms!
## [1] "Starting analysis 4 of 5"
## Warning in rev(-log(as.numeric(allres_x$classicFisher[1:maxgenesplot]) * : NAs
## introduced by coercion
## [1] "Starting analysis 5 of 5"
## Warning in rev(-log(as.numeric(allres_x$classicFisher[1:maxgenesplot]) * : NAs
## introduced by coercion
## [1] "Generating output"
We will have a look at the output of these wrappers a bit later. We will now compare these two networks using the NetworkStats and the filtered tab-format networks that we have generated.
Part of this comparison involves defining a subset of genes of interest, which we load here.
genes_postgr <-
rownames(pfla_rna_dev)[
pfla_rna_dev$cID > 12
]
And another part is loading the results of running ANANSE influence, which takes into account differential gene expression to infer the responsible factors for changes in the networks of cis-regulatory interactions between two states (in our case, the transition from early blastula to late gastrula).
ananse_EB_to_LG <-
read.table("/home/ska/aperpos/Def_Pfla/outputs/ananse/20210909_ANANSE_EG_EB",header=T) #change this path
ananse_EB_to_LG$factor <-
sub("TCONS","TCONS_",ananse_EB_to_LG$factor)
The compareNetworks wrapper compares two networks, nw_a and nw_b, and their respective graphs, graph_a and graph_b, and outputs various metrics, visualizations, and data frames.
The function requires the following arguments:
nw_a: A data frame or network object for network A.nw_b: A data frame or network object for network B.graph_a: A graph object for network A.graph_b: A graph object for network B.stats_a: A list of statistics for network A. Generated using NetworkStats() as explained above.stats_b: A list of statistics for network B. Generated using NetworkStats() as explained above.influence: if present, it outputs subgraphs of, and visualisations of, the top factors explaining the transition (as estimated by ANANSE influence).name_network_a: The name of network A. The default is “a”.name_network_b: The name of network B. The default is “b”.col_a: The color of network A in the Venn diagram. The default is “darkorange”.col_b: The color of network B in the Venn diagram. The default is “purple”.geneset_interest: if present, it compares the percentage of a set of genes of interest as targets in each of the networks.top: The proportion of top targets in each network to consider. The default is 0.9.tfs: Not used in the function.gene_universe: A vector of all genes.id2go: A data frame containing gene ontology (GO) annotations.The function performs the following tasks:
pdf(file = "graphics/EB_LG_comparenetworks.pdf",he=8,wi=10)
EB_vs_LG <- compareNetworks(
name_network_a = "EB",
name_network_b = "LG",
nw_a = pfla_EB_nw2,
nw_b = pfla_LG_nw2,
top = 0.9,
stats_a = pfla_EB_stats,
stats_b = pfla_LG_stats,
graph_a = pfla_EB_graph2,
graph_b = pfla_LG_graph2,
influence = ananse_EB_to_LG,
tfs = pfla_tfs_graph_analysis,
geneset_interest = genes_postgr,
id2go = pfla_id_GO,
gene_universe = gene_universe,
col_a = "#efaa90",
col_b = "#fbcf99"
)
## [1] "Defining exclusive and common targets"
## [1] "getting GO terms"
## [1] "Starting analysis 1 of 3"
## [1] "Starting analysis 2 of 3"
## [1] "Starting analysis 3 of 3"
## [1] "Dataframe of various metrics"
## [1] "Gene Behaviour across networks"
## [1] "Gene centrality of ALL common genes across networks"
## [1] "Gene centrality of TFs across networks"
## [1] "Gene centrality of trans-dev genes across networks"
## [1] "Graph generation - TF genes"
## [1] "Graph generation - Trans-dev genes"
## [1] "Plots"
## [1] "Mean connections per TF gene"
## [1] "Number of self-regulating TFs"
## [1] "connections per target gene"
## [1] "Venn Diagram Plot"
## [1] "Number of genes of interest in targets"
## [1] "Gene Behaviour plot"
## [1] "Gene Behaviour Box plot"
## [1] "Changes in centrality for ALL genes"
## [1] "Changes in centrality for TF genes"
## [1] "Foldchange in centrality of TFs"
## [1] "graph plots of TFs that change"
## [1] "Changes in trans-dev centrality across networks"
## [1] "Foldchange % emitted connections per gene"
## [1] "graph plots of trans-dev that change"
## [1] "Influence"
## [1] "Influence graph"
## Warning in length(vattrs[[name]]) <- vc: length of NULL cannot be changed
## [1] "Plot influence graph"
dev.off()
## png
## 2
We can use the wrapper NetworkPlots to generate ALL of these plots together. The CompareNetworks wrapper will also generate plots into a file if we run the whole function inside a call to a pdf device, as done above.
pdf(file = "graphics/EB_graph_plots.pdf",he=20,wi=20)
NetworkPlots(
stats = pfla_EB_stats,
nw = pfla_EB_nw,
tfcol = allTFclasses_col,
layout = TRUE,
pdf = F
)
dev.off()
## png
## 2
pdf(file = "graphics/LG_graph_plots.pdf",he=20,wi=20)
NetworkPlots(
stats = pfla_LG_stats,
nw = pfla_LG_nw,
tfcol = allTFclasses_col,
layout = TRUE,
pdf = F
)
dev.off()
## png
## 2
Although these plots have been saved in disk, for clarity, we will explore these analyses by re-plotting them here. We will define the constants of colors.
col_a = "#efaa90"
col_b = "#fbcf99"
We start by having a look at the number of connections per target gene. This is done by counting the number of instances of each gene in the “target” column of the tabular networks.
# num connections per target gene
boxplot(
main = "connections per\ntarget gene",
list(
c(EB_vs_LG$connections_per_tgt_gene$connects_per_tgt_gene_a),
c(EB_vs_LG$connections_per_tgt_gene$connects_per_tgt_gene_b)
),
names = c("\nEarly\nBlastula", "\nLate\nGastrula"),
col = c(col_a,col_b),
sub = paste0(
"Wilcox p.value ",
wilcox.test(
x = c(EB_vs_LG$connections_per_tgt_gene$connects_per_tgt_gene_a),
y = c(EB_vs_LG$connections_per_tgt_gene$connects_per_tgt_gene_b)
)$p.value
)
)
We can also have a look at the number of TFs that are self-regulated on each network by counting the number of times a given TF gene appears as a target of itself in the tabular networks.
# Number of self-reg TFs
barplot(
main = "Number of\nself-regulating TFs",
height = c(
EB_vs_LG$comparison_table$a[EB_vs_LG$comparison_table$metric == "Num Self-Regulated TFs"],
EB_vs_LG$comparison_table$b[EB_vs_LG$comparison_table$metric == "Num Self-Regulated TFs"]
),
names.arg = c("Early\nBlastula", "Late\nGastrula"),
col = c(col_a,col_b),
)
How different are these networks at the target level? We can dig into this by looking at how overlapped these networks are, and at the enriched GO terms in the exclusive and commonly-shared target genes of each network.
# number of shared targets
x <-
list(
EB = c(
EB_vs_LG$target_genes$targets_common_ab,
EB_vs_LG$target_genes$targets_exclusive_a
),
LG = c(
EB_vs_LG$target_genes$targets_common_ab,
EB_vs_LG$target_genes$targets_exclusive_b
)
)
# Helper function to display Venn diagram
display_venn <- function(x, ...){
library(VennDiagram)
grid.newpage()
venn_object <- venn.diagram(x, filename = NULL, ...)
grid.draw(venn_object)
}
# Display the plot directly in R:
display_venn(
x,
# Circles
lwd = 2,
lty = 'blank',
fill = c(col_a, col_b),
# Numbers
cex = .9,
fontface = "italic",
# Set names
category.names = c("EB" , "LG"),
cat.cex = 1,
cat.pos = 180,
cat.fontface = "bold",
cat.default.pos = "outer"
)
Here we have the enriched GO terms for the target genes exclusive to EB, the target genes exclusive to LG, and the common ones.
# GO barplots or just the words for them
EB_vs_LG$GOs_targets$GOtable$targets_exclusive_a
## GO.ID
## 1 GO:0045892
## 2 GO:0009953
## 3 GO:0045893
## 4 GO:0000122
## 5 GO:1904668
## 6 GO:0045944
## 7 GO:1904589
## 8 GO:1903715
## 9 GO:0034622
## 10 GO:0071481
## 11 GO:0060425
## 12 GO:0060027
## 13 GO:0048286
## 14 GO:0032436
## 15 GO:0014834
## 16 GO:0007362
## 17 GO:0021953
## 18 GO:0006457
## 19 GO:0035019
## 23 GO:0007476
## 24 GO:0021697
## 25 GO:0045648
## 26 GO:0007378
## 27 GO:1901987
## 28 GO:0048103
## 29 GO:0071786
## 30 GO:0030335
## Term
## 1 negative regulation of transcription, DNA-templated
## 2 dorsal/ventral pattern formation
## 3 positive regulation of transcription, DNA-templated
## 4 negative regulation of transcription by RNA polymerase II
## 5 positive regulation of ubiquitin protein ligase activity
## 6 positive regulation of transcription by RNA polymerase II
## 7 regulation of protein import
## 8 regulation of aerobic respiration
## 9 cellular protein-containing complex assembly
## 10 cellular response to X-ray
## 11 lung morphogenesis
## 12 convergent extension involved in gastrulation
## 13 lung alveolus development
## 14 positive regulation of proteasomal ubiquitin-dependent protein catabolic process
## 15 skeletal muscle satellite cell maintenance involved in skeletal muscle regeneration
## 16 terminal region determination
## 17 central nervous system neuron differentiation
## 18 protein folding
## 19 somatic stem cell population maintenance
## 23 imaginal disc-derived wing morphogenesis
## 24 cerebellar cortex formation
## 25 positive regulation of erythrocyte differentiation
## 26 amnioserosa formation
## 27 regulation of cell cycle phase transition
## 28 somatic stem cell division
## 29 endoplasmic reticulum tubular network organization
## 30 positive regulation of cell migration
## Annotated Significant Expected classicFisher
## 1 919 38 12.66 0.0000071
## 2 263 17 3.62 0.0000071
## 3 1202 43 16.56 0.0000162
## 4 625 23 8.61 0.0000193
## 5 13 4 0.18 0.0000228
## 6 958 30 13.20 0.0000234
## 7 108 9 1.49 0.0000474
## 8 30 5 0.41 0.0000513
## 9 1068 31 14.72 0.0000709
## 10 17 4 0.23 0.0000727
## 11 52 6 0.72 0.0000772
## 12 33 5 0.45 0.0000826
## 13 53 6 0.73 0.0000861
## 14 103 8 1.42 0.0000895
## 15 8 3 0.11 0.00014
## 16 20 4 0.28 0.00014
## 17 207 11 2.85 0.00015
## 18 209 11 2.88 0.00016
## 19 60 6 0.83 0.00017
## 23 289 13 3.98 0.00020
## 24 22 4 0.30 0.00021
## 25 23 4 0.32 0.00025
## 26 10 3 0.14 0.00029
## 27 522 22 7.19 0.00030
## 28 93 7 1.28 0.00030
## 29 11 3 0.15 0.00039
## 30 353 14 4.86 0.00041
EB_vs_LG$GOs_targets$GOtable$targets_exclusive_b
## GO.ID Term
## 1 GO:0003310 pancreatic A cell differentiation
## 2 GO:0042472 inner ear morphogenesis
## 4 GO:0048485 sympathetic nervous system development
## 5 GO:0045229 external encapsulating structure organization
## 6 GO:0021877 forebrain neuron fate commitment
## 7 GO:0030335 positive regulation of cell migration
## 8 GO:0045665 negative regulation of neuron differentiation
## 9 GO:0000122 negative regulation of transcription by RNA polymerase II
## 10 GO:0030500 regulation of bone mineralization
## 11 GO:0008347 glial cell migration
## 12 GO:0001764 neuron migration
## 13 GO:0021987 cerebral cortex development
## 14 GO:0014898 cardiac muscle hypertrophy in response to stress
## 15 GO:0003334 keratinocyte development
## 16 GO:0042130 negative regulation of T cell proliferation
## 17 GO:0030049 muscle filament sliding
## 18 GO:0051668 localization within membrane
## 19 GO:0021854 hypothalamus development
## 20 GO:0030199 collagen fibril organization
## 21 GO:0060021 roof of mouth development
## 29 GO:0061031 endodermal digestive tract morphogenesis
## 30 GO:0001839 neural plate morphogenesis
## Annotated Significant Expected classicFisher
## 1 6 4 0.08 0.00000045
## 2 150 12 1.99 0.00000078
## 4 19 5 0.25 0.00000396
## 5 347 17 4.61 0.00000420
## 6 12 4 0.16 0.00001382
## 7 353 16 4.69 0.00002121
## 8 250 13 3.32 0.00003130
## 9 625 22 8.30 0.00003278
## 10 65 8 0.86 0.00004611
## 11 74 7 0.98 0.00005641
## 12 187 14 2.48 0.00005925
## 13 130 9 1.73 0.00006045
## 14 17 4 0.23 0.00006305
## 15 7 3 0.09 0.00007786
## 16 37 5 0.49 0.00012
## 17 8 3 0.11 0.00012
## 18 551 19 7.32 0.00015
## 19 21 4 0.28 0.00015
## 20 21 4 0.28 0.00015
## 21 88 7 1.17 0.00017
## 29 9 3 0.12 0.00018
## 30 9 3 0.12 0.00018
EB_vs_LG$GOs_targets$GOtable$targets_common_ab
## GO.ID
## 1 GO:0051668
## 2 GO:0032525
## 3 GO:0045944
## 4 GO:0048025
## 5 GO:0000381
## 6 GO:0048147
## 7 GO:0008103
## 8 GO:0008285
## 9 GO:0035722
## 10 GO:0007430
## 11 GO:0000184
## 12 GO:0001745
## 13 GO:0000122
## 16 GO:0002181
## 17 GO:0048469
## 18 GO:0051128
## 19 GO:0021884
## 20 GO:0032482
## 21 GO:0006413
## 22 GO:1903715
## 23 GO:0007391
## 25 GO:0048264
## 26 GO:0035019
## 27 GO:0006614
## 28 GO:0015988
## 29 GO:0001708
## 30 GO:0046716
## Term
## 1 localization within membrane
## 2 somite rostral/caudal axis specification
## 3 positive regulation of transcription by RNA polymerase II
## 4 negative regulation of mRNA splicing, via spliceosome
## 5 regulation of alternative mRNA splicing, via spliceosome
## 6 negative regulation of fibroblast proliferation
## 7 oocyte microtubule cytoskeleton polarization
## 8 negative regulation of cell population proliferation
## 9 interleukin-12-mediated signaling pathway
## 10 terminal branching, open tracheal system
## 11 nuclear-transcribed mRNA catabolic process, nonsense-mediated decay
## 12 compound eye morphogenesis
## 13 negative regulation of transcription by RNA polymerase II
## 16 cytoplasmic translation
## 17 cell maturation
## 18 regulation of cellular component organization
## 19 forebrain neuron development
## 20 Rab protein signal transduction
## 21 translational initiation
## 22 regulation of aerobic respiration
## 23 dorsal closure
## 25 determination of ventral identity
## 26 somatic stem cell population maintenance
## 27 SRP-dependent cotranslational protein targeting to membrane
## 28 energy coupled proton transmembrane transport, against electrochemical gradient
## 29 cell fate specification
## 30 muscle cell cellular homeostasis
## Annotated Significant Expected classicFisher
## 1 551 43 12.04 0.00000000000045
## 2 10 6 0.22 0.00000002048785
## 3 958 48 20.94 0.00000035355660
## 4 23 7 0.50 0.00000040984365
## 5 77 11 1.68 0.00000086856158
## 6 17 6 0.37 0.00000106012818
## 7 18 6 0.39 0.00000156095350
## 8 593 46 12.96 0.00000164491195
## 9 29 7 0.63 0.00000232979446
## 10 20 6 0.44 0.00000314044834
## 11 77 10 1.68 0.00000661803964
## 12 331 28 7.24 0.00000732077374
## 13 625 32 13.66 0.00000788494280
## 16 121 12 2.64 0.00001381648665
## 17 400 28 8.74 0.00001832157318
## 18 2658 111 58.10 0.00002283902845
## 19 41 7 0.90 0.00002677107716
## 20 42 7 0.92 0.00003152709924
## 21 155 13 3.39 0.00003698330703
## 22 30 6 0.66 0.00003998130526
## 23 114 11 2.49 0.00004064315736
## 25 10 4 0.22 0.00004254711440
## 26 60 8 1.31 0.00004601128098
## 27 60 8 1.31 0.00004601128098
## 28 20 5 0.44 0.00005754160947
## 29 227 25 4.96 0.00006058219898
## 30 64 8 1.40 0.00007376842136
How are these changes in target genes achieved? What is happening at the factor level?
For this we can look at the changes in centrality for different genes across networks.
# plots of correlation changes in centrality tfs and transdev
par(mfrow = c(1,2))
plot(
main = "Changes in TF centrality across networks",
x = EB_vs_LG$tf_centrality_across_networks$a,
y = EB_vs_LG$tf_centrality_across_networks$b,
xlab = "Early Blastula",
ylab = "Late Gastrula",
pch = 19,
cex = 0.75,
col = alpha("#E58745",0.25)
)
plot(
main = "Foldchange in centrality of TFs",
sort(
EB_vs_LG$tf_centrality_across_networks$b /
EB_vs_LG$tf_centrality_across_networks$a
),
col = alpha("#E58745",0.5),
ylab = "foldchange centrality"
)
par(mfrow = c(1,1))
In this case we see a large number of genes tend to acquire higher levels of centrality in LG compared to EB, as shown in the scatter plot and the plot of the foldchange.
And the same about trans-dev genes:
par(mfrow = c(1,2))
# same in transdev
plot(
main = "Changes in trans-dev centrality across networks",
x = EB_vs_LG$transdev_centrality_across_networks$a,
y = EB_vs_LG$transdev_centrality_across_networks$b,
xlab="Early Blastula",
ylab = "Late Gastrula",
pch = 19,
cex = 0.75,
col = alpha("#1A9A83",0.25)
)
plot(
main = "Foldchange in centrality of trans-devs",
sort(
EB_vs_LG$transdev_centrality_across_networks$b /
EB_vs_LG$transdev_centrality_across_networks$a
),
col = alpha("#1A9A83",0.5),
ylab = "foldchange centrality"
)
par(mfrow = c(1,1))
This is also reflected at the InvsOut-Degree of genes, what we call “gene behaviour”, and how it changes between networks. This can be visualised as cumulative density functions and/or boxplots.
eb_inout_ratio <-
pfla_EB_stats$In_Out_per_Gene$ratio[
pfla_EB_stats$In_Out_per_Gene$ratio > 0
]
lg_inout_ratio <-
pfla_LG_stats$In_Out_per_Gene$ratio[
pfla_LG_stats$In_Out_per_Gene$ratio > 0
]
plot(
seq(1:length(eb_inout_ratio)),
eb_inout_ratio,
main = "Gene behavior\n(types & amount\nof connections)",
col = col_a,
type = "l",
xlab = "",
ylab = "% emitting connections",
lwd = 2,
ylim = c(0,1)
)
lines(
seq(1:79),
lg_inout_ratio[1:79],
main = "Gene behavior\n(types & amt of connections)",
col = col_b,
lwd = 2
)
To have a better idea of what are these TF genes, and since we saw that there is a TF switch at the transcriptional level during development (see previous markdowns), we can showcase the changes in the network structure by looking at the centrality of the different genes by TF class.
In this case we use the function plot_centrailty_TFclass that grabs a named list with teh centrality values of TF genes grouped by class and creates a barplot. Colors are assigned based on the named vector with TFclasses and color.
## Centrality per TF class
par(mfrow = c(2,1))
plot_centrality_TFclass(
s = pfla_EB_stats$Centrality_per_TFclass,
f = allTFclasses_col,
# sub_s = topclasses,
main = "Centrality per TF Class - EB",
ylim = c(0.00005,0.00012)
)
plot_centrality_TFclass(
s = pfla_LG_stats$Centrality_per_TFclass,
f = allTFclasses_col,
# sub_s = topclasses,
main = "Centrality per TF Class - LG",
ylim = c(0.00005,0.00012)
)
par(mfrow = c(1,1))
Another way to check for differences between target genes in networks is looking at the presence of certain genes of interest in each. For example, we can check how many genes of larval development are being targeted in each of the networks. We see there is a large different in the proportion of larval genes that are embedded in the LG network compared to the EB network. This is hinting at the aforementioned switch towards larval development already taking place at the cis-regulatory level during gastrulation.
# number of post-gastrulation genes in network
barplot(
main="Post-gastrulation genes\nin network",
height=c(
EB_vs_LG[[11]][1]/length(EB_vs_LG$target_genes$targets_exclusive_a)*100,
EB_vs_LG[[11]][2]/length(EB_vs_LG$target_genes$targets_exclusive_b)*100
),
col = c(
col_a,
col_b
),
names=c("EB", "LG"),
ylab="gene percent",
ylim = c(0,65),
las=1
)
Finally, to check the differences between the networks of these developmental stages, we can use the output of ANANSE influence to check for the top factors. Below we re-run the code used to generate the influence plot in compareNetworks().
# influence
influ_tbl <- ananse_EB_to_LG
influ_tbl <- merge(influ_tbl,pfla_tfs_graph_analysis,by.x=1,by.y=1,all.x=T)
influ_tbl$TFclass[is.na(influ_tbl$TFclass)] <- " "
influ_tbl$col[is.na(influ_tbl$col)] <- "gray"
influ_tbl <- influ_tbl [rev(order(influ_tbl$sumScaled)),]
rownames(influ_tbl) <- NULL
# 'translate' the gene ids to known gene names using a small function
influ_tbl$genename <-
translate_ids(x = influ_tbl$factor,dict = pfla_genenames)
Here the plot of ANANSE influence. Colors indicate different TF classes. Where available, gene names have been added.
plot(
influ_tbl$factor_fc,
influ_tbl$sumScaled,
pch=19,
col=influ_tbl$col,
bg="black",
xlab="log2fold change of TF",
ylab="ANANSE influence score",
main="Main factors",
bty="n",
xlim=c(0,max(influ_tbl$factor_fc)+1)
)
text(
influ_tbl$factor_fc[1:20],
influ_tbl$sumScaled[1:20],
influ_tbl$genename[1:20],
pos = 3,
cex=0.85
)
And here is the plot of the graph of these TFs:
V(EB_vs_LG$influence_graph)$genename <-
translate_ids(V(EB_vs_LG$influence_graph)$name,pfla_genenames)
E(EB_vs_LG$influence_graph)$width <-
category_by_quantile(
E(EB_vs_LG$influence_graph)$prob,
newvalues = c(0.2,1,2,5)
)
plot(
main = "Influence network",
EB_vs_LG$influence_graph,
vertex.color = V(EB_vs_LG$influence_graph)$col,
vertex.label = V(EB_vs_LG$influence_graph)$genename,
edge.width = E(EB_vs_LG$influence_graph)$width,
edge.arrow.size = 0.2,
edge.color = rgb(0,0,0,0.1),
layout = layout_with_fr(EB_vs_LG$influence_graph)
)
Here there will be some code to subset the graphs and get the genes of each developmental layer.
Here there will be code to retrieve the subgraphs of genes from GRNs in other organisms.
We will save the data now:
save(
# Attributes
pfla_attributes_list,
pfla_tfs_graph_analysis,
pfla_tdhk,
pfla_tfeg,
pfla_funcat,
# Early Blastula
pfla_EB_nw,
pfla_EB_nw2,
pfla_EB_parsenetwork,
pfla_EB_graph,
pfla_EB_graph2,
pfla_EB_stats,
# Late Gastrula
pfla_LG_nw,
pfla_LG_nw2,
pfla_LG_parsenetwork,
pfla_LG_graph,
pfla_LG_graph2,
pfla_LG_stats,
# Network comparison
EB_vs_LG,
file = "outputs/rda/graph_analysis.rda"
)